Load protein abundance data, pre-process them into a format suitable for network analysis, and clean the data by removing obvious outlier samples as well as proteins and samples with excessive numbers of missing entries.
The expression data is contained in the file:
Files grouped by samples, not share same proteins. This is one important point to discuss. One aporximation is consider missing protein with 0 abundance. Also it can be considered only the shared proetins.
Table with phenotype is:
datatable_jm<-function(x,column=NULL){
library(DT)
datatable(
x,
extensions = 'Buttons',
filter = list(position = 'top', clear = T),
options = list(dom = 'Blfrtip',
scrollX = TRUE,
scrollY = T,
scrollCollapse = TRUE,
# buttons = list(list(extend = 'colvis')),
buttons = list(list(extend = 'colvis'),'copy', 'print',
list(extend = 'collection',
buttons = c('csv', 'excel', 'pdf'),
text = 'Download')),
columnDefs = list(list(visible=FALSE, targets=column))))
}library(readxl)
# Load the WGCNA package
library(WGCNA)
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE)
# Read in the female liver data set
pheno <- read.csv("20240118-data(1).csv")
pheno$Group_num <- pheno$Group
pheno$Group_num <- gsub("NaCl", 1, pheno$Group_num)
pheno$Group_num <- gsub("PRP", 0, pheno$Group_num)
pheno$Group_num<-as.numeric(pheno$Group_num)
datatable_jm(pheno)Variable of interest Group. To performe WGCNA analysis needs to be in numeric format. Moduls with posstitive correaltion means that are associated with Group_num= 1, NaCl
# load("RESULTATS/OBJECTES/data_abundance")
data_abundance_raw <- read_xlsx("./Gener/1-14_A1_A2_A3_C1_C2_C3_precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_AvsC_abundances_indiv(1).xlsx")
dim(data_abundance_raw)## [1] 1183 114
#dim(data_abundance_raw)
data_abundance_raw_1 <- (data_abundance_raw)[grep("Abundances [(]Normalized)", colnames(data_abundance_raw))]
#dim(data_abundance_raw_1)
# data_abundance_raw_1 Aquesta matriu la fare servir per el WGCNA
data_abundance_raw_1 <- data.frame(data_abundance_raw_1)
rownames(data_abundance_raw_1) <- data_abundance_raw$Accession
# Arreglar els noms de les mostres
colnames(data_abundance_raw_1) <- gsub("Abundances..Normalized...", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("..precipitation..EVs..n.a..1..1..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("..precipitation..EVs..n.a..1..2..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("..precipitation..EVs..n.a..2..1..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("..precipitation..EVs..n.a..2..2..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("..precipitation..EVs..n.a..3..1..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("..precipitation..EVs..n.a..3..2..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub(".Sample..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("[.]", "_", colnames(data_abundance_raw_1))
#dim(data_abundance_raw_1)
data_abundance_raw_1 <- data_abundance_raw_1[, -grep("F65", colnames(data_abundance_raw_1))]
#dim(data_abundance_raw_1)
data_abundance_raw_1 <- data_abundance_raw_1[, -grep("F66", colnames(data_abundance_raw_1))]
#dim(data_abundance_raw_1)
data_abundance_raw_1 <- data_abundance_raw_1[, -grep("F75", colnames(data_abundance_raw_1))]
#dim(data_abundance_raw_1)
data_abundance_raw_1 <- data_abundance_raw_1[, -grep("76", colnames(data_abundance_raw_1))]
#dim(data_abundance_raw_1)
pheno <- data.frame(name = colnames(data_abundance_raw_1))
pheno$Sample <- NA
pheno$Group <- NA
pheno <- read.csv("20240118-data(1).csv")
pheno <- pheno[-grep("F65", pheno$name), ]
pheno <- pheno[-grep("F66", pheno$name), ]
pheno <- pheno[-grep("F75", pheno$name), ]
pheno <- pheno[-grep("F76", pheno$name), ]
pheno$Sample_ID <- unlist(lapply(pheno$name, function(x) strsplit(x, "_")[[1]][1]))
#dim(pheno)
library(reshape2)
data_abundance_raw_2<-data_abundance_raw_1
data_abundance_raw_2$protein<-rownames(data_abundance_raw_2)
as<-melt(data_abundance_raw_2)
library(dplyr)
as<-merge(as,pheno,by.x="variable",by.y="name",all.x = T)
as<-
as %>%group_by(protein,Sample) %>%
mutate(value_mean=mean(value))
as<-as %>% distinct(Sample,protein,.keep_all = T)
as<-as[,colnames(as)%in%c("variable","Sample","protein","value_mean")]
data_abundance_raw_3 <- dcast(as,protein ~ variable , value.var="value_mean")
rownames(data_abundance_raw_3)<-data_abundance_raw_3$protein
data_abundance_raw_3<-data_abundance_raw_3[,-1]
# colnames(data_abundance_raw_3)<-unique(as$Sample)
# Hi ha NA. Substituir per 0.01
#table(is.na(data_abundance_raw_3))
data_abundance_raw_3[is.na(data_abundance_raw_3)] <- 0.01
datExpr0 <- as.data.frame(data_abundance_raw_3)
rownames(datExpr0) <- rownames(datExpr0)
# datExpr0<-datExpr0[,-1]
datExpr0 <- t(datExpr0)
#dim(datExpr0)
rownames(datExpr0)<-unique(as$Sample)The expression data set contains 6 samples. Note that each row corresponds to a protein and column to a sample.
We first check for proteins and samples with too many missing values:
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Excluding 21 genes from the calculation due to too many missing samples or zero variance.
## ..step 2
## [1] FALSE
if (!gsg$allOK) {
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes) > 0) {
printFlush(paste("Removing genes:", paste(colnames(datExpr0)[!gsg$goodGenes], collapse = ", ")))
}
if (sum(!gsg$goodSamples) > 0) {
printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
}
# Remove the offending genes and samples from the data:
datExpr0 <- datExpr0[gsg$goodSamples, gsg$goodGenes]
}## Removing genes: O15173, P06493, P08670, P0CG38, P0CG39, P0DPH8
## P0DPH7, P11217, P12036, P50993, P62745, P68133, Q12840, Q5T0T0, Q5VST9, Q6A162, Q6ZU15, Q7Z4I7, Q8NF91, Q9NYU2, Q9P2T1, Q9Y4G6
Next we cluster the samples (in contrast to clustering proteins that will come later) to see if there are any obvious outliers.
## null device
## 1
#dim(datExpr0)
sampleTree <- hclust(dist(datExpr0), method = "average")
par(cex = 0.6)
par(mar = c(0, 4, 2, 0))
plot(sampleTree,
main = "Sample clustering to detect outliers", sub = "", xlab = "", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2
)Les mostres A2 (F65 i F66 ) i la C1 (F75 i F76) ja estan eliminades.
# Create datTraits
allTraits <- data.frame(pheno)
nameSamples <- rownames(datExpr0)
datTraits <- data.frame(name = nameSamples)
# PL-EV1 -> llista de plaquetes ->NaCL
datTraits$nom_uni <- datTraits$name
# datTraits$nom_uni[grep("Lisat.Plaquetes",datTraits$name)]<-"NaCl"
traitRows <- match(nameSamples, allTraits$Sample)
datTraits <- allTraits[traitRows, ]
rownames(datTraits) <- allTraits[traitRows, 1]
collectGarbage()We now have the expression data in the variable datExpr, and the corresponding clinical traits in the variable datTraits.
Before we continue with network construction and module detection, we visualize how the clinical traits relate to the sample dendrogram
# Re-cluster samples
sampleTree2 <- hclust(dist(datExpr0), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
datTraits$Group_num <- datTraits$Group
datTraits$Group_num <- gsub("NaCl", 1, datTraits$Group_num)
datTraits$Group_num <- gsub("PRP", 0, datTraits$Group_num)
traitColors <- numbers2colors(as.numeric(datTraits$Group_num), signed = FALSE)
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap"
)Groups are very different (between other things, are not share proteins).
library(sva)
save(datExpr0,file = "./matriu_unificada")
datExpr0_norm <-
ComBat(
t(datExpr0),
batch = c(rep("ap", 3), rep("PL", (3))),
mod = NULL,
par.prior = TRUE,
prior.plots = FALSE,
mean.only = FALSE,
ref.batch = NULL,
BPPARAM = bpparam("SerialParam")
)
datExpr0_norm <- t(datExpr0_norm)Before we continue with network construction and module detection, we visualize how the clinical traits relate to the sample dendrogram
# Re-cluster samples
sampleTree2_norm <- hclust(dist(datExpr0_norm), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
datTraits$Group_num <- datTraits$Group
datTraits$Group_num <- gsub("NaCl", 1, datTraits$Group_num)
datTraits$Group_num <- gsub("PRP", 0, datTraits$Group_num)
traitColors <- numbers2colors(as.numeric(datTraits$Group_num), signed = FALSE)
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2_norm, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap"
)Constructing a weighted gene network entails the choice of the soft thresholding power β to which co-expression similarity is raised to calculate adjacency [1]. The authors of [1] have proposed to choose the soft thresholding power based on the criterion of approximate scale-free topology. We refer the reader to that work for more details; here we illustrate the use of the function pickSoftThreshold that performs the analysis of network topology and aids the user in choosing a proper soft-thresholding power.
# Choose a set of soft-thresholding powers
powers = c(c(1:50), seq(from = 52, to=100, by=2))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr0,
networkType = "unsigned",
powerVector = powers, verbose = 5)## pickSoftThreshold: will use block size 1162.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 1162 of 1162
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.7630 3.4400 0.94300 539.00 553.00 699.0
## 2 2 0.6180 1.3600 0.72500 343.00 342.00 513.0
## 3 3 0.3530 0.4860 0.30600 251.00 236.00 421.0
## 4 4 0.0270 0.0882 -0.19000 199.00 177.00 367.0
## 5 5 0.0435 -0.1130 -0.16900 165.00 139.00 331.0
## 6 6 0.1950 -0.2470 0.00691 141.00 114.00 304.0
## 7 7 0.3070 -0.3520 0.11800 124.00 98.20 283.0
## 8 8 0.3950 -0.4290 0.22800 111.00 88.20 266.0
## 9 9 0.4440 -0.4840 0.28600 99.90 78.60 251.0
## 10 10 0.5070 -0.5500 0.36600 91.10 70.20 239.0
## 11 11 0.5590 -0.5860 0.43300 83.80 63.00 228.0
## 12 12 0.6120 -0.6180 0.50100 77.60 57.00 219.0
## 13 13 0.6230 -0.6480 0.51700 72.30 52.10 210.0
## 14 14 0.6420 -0.6800 0.54400 67.60 47.70 202.0
## 15 15 0.6600 -0.7060 0.57200 63.50 44.20 195.0
## 16 16 0.7030 -0.7270 0.62800 59.90 41.00 189.0
## 17 17 0.7430 -0.7530 0.68100 56.60 38.00 183.0
## 18 18 0.7650 -0.7750 0.70900 53.70 35.50 178.0
## 19 19 0.7740 -0.7870 0.72100 51.10 33.10 173.0
## 20 20 0.7910 -0.8100 0.74700 48.70 30.90 168.0
## 21 21 0.7950 -0.8200 0.75600 46.50 28.80 164.0
## 22 22 0.7970 -0.8280 0.75900 44.50 27.20 160.0
## 23 23 0.8060 -0.8410 0.76700 42.60 25.70 156.0
## 24 24 0.8110 -0.8540 0.76700 40.90 24.20 152.0
## 25 25 0.8280 -0.8590 0.79000 39.30 23.10 149.0
## 26 26 0.8340 -0.8640 0.79700 37.90 22.00 146.0
## 27 27 0.8400 -0.8720 0.80400 36.50 21.10 142.0
## 28 28 0.8440 -0.8730 0.81000 35.20 20.30 140.0
## 29 29 0.8490 -0.8840 0.81900 34.00 19.40 137.0
## 30 30 0.8590 -0.8890 0.83100 32.90 18.60 134.0
## 31 31 0.8650 -0.8960 0.83900 31.80 18.00 131.0
## 32 32 0.8670 -0.9020 0.84200 30.80 17.20 129.0
## 33 33 0.8700 -0.9120 0.84800 29.90 16.50 126.0
## 34 34 0.8710 -0.9200 0.84800 29.00 15.90 124.0
## 35 35 0.8800 -0.9310 0.85800 28.20 15.40 122.0
## 36 36 0.8900 -0.9350 0.87000 27.40 14.90 120.0
## 37 37 0.8960 -0.9390 0.87700 26.60 14.40 118.0
## 38 38 0.9010 -0.9490 0.88300 25.90 14.00 116.0
## 39 39 0.9070 -0.9550 0.89300 25.20 13.50 114.0
## 40 40 0.9190 -0.9550 0.90700 24.60 13.00 112.0
## 41 41 0.9150 -0.9610 0.90200 23.90 12.60 110.0
## 42 42 0.9200 -0.9640 0.90700 23.40 12.20 109.0
## 43 43 0.9250 -0.9710 0.91500 22.80 11.80 107.0
## 44 44 0.9300 -0.9750 0.92000 22.20 11.50 105.0
## 45 45 0.9340 -0.9780 0.92500 21.70 11.20 104.0
## 46 46 0.9330 -0.9830 0.92400 21.20 10.80 102.0
## 47 47 0.9270 -0.9930 0.91700 20.70 10.50 101.0
## 48 48 0.9250 -0.9950 0.91400 20.30 10.20 99.3
## 49 49 0.9330 -1.0000 0.92400 19.80 9.91 98.0
## 50 50 0.9360 -1.0100 0.92900 19.40 9.61 96.6
## 51 52 0.9440 -1.0100 0.93800 18.60 9.20 94.0
## 52 54 0.9460 -1.0200 0.94000 17.90 8.73 91.6
## 53 56 0.9470 -1.0200 0.94000 17.20 8.33 89.3
## 54 58 0.9490 -1.0300 0.94300 16.50 7.95 87.1
## 55 60 0.9440 -1.0400 0.93700 15.90 7.60 85.0
## 56 62 0.9420 -1.0500 0.93400 15.40 7.32 83.0
## 57 64 0.9440 -1.0500 0.93500 14.80 7.01 81.0
## 58 66 0.9420 -1.0600 0.93100 14.30 6.70 79.2
## 59 68 0.9420 -1.0700 0.93000 13.90 6.39 77.5
## 60 70 0.9520 -1.0700 0.94200 13.40 6.08 75.8
## 61 72 0.9430 -1.0700 0.92900 13.00 5.84 74.3
## 62 74 0.9480 -1.0800 0.93600 12.60 5.61 72.8
## 63 76 0.9420 -1.0800 0.92700 12.20 5.40 71.4
## 64 78 0.9420 -1.0900 0.92600 11.90 5.22 70.1
## 65 80 0.9440 -1.0900 0.93000 11.50 5.01 68.8
## 66 82 0.9550 -1.1000 0.94300 11.20 4.83 67.6
## 67 84 0.9510 -1.1000 0.93700 10.90 4.62 66.4
## 68 86 0.9340 -1.1100 0.91600 10.60 4.45 65.2
## 69 88 0.9290 -1.1200 0.90900 10.30 4.29 64.1
## 70 90 0.9210 -1.1200 0.89800 10.10 4.15 63.1
## 71 92 0.9320 -1.1100 0.91200 9.82 4.02 62.0
## 72 94 0.9260 -1.1200 0.90600 9.58 3.88 61.0
## 73 96 0.9160 -1.1200 0.89200 9.35 3.76 60.1
## 74 98 0.9100 -1.1300 0.88500 9.12 3.64 59.1
## 75 100 0.9180 -1.1300 0.89500 8.91 3.52 58.2
# Plot the results:
sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding powerplot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")Constructing the gene network and identifying modules is now a simple function call:
pw<-12
net = blockwiseModules(datExpr0,
power = pw,
networkType="unsigned",
# TOMType = "unsigned",
minModuleSize = 30,
reassignThreshold = 0,
mergeCutHeight = 0.25,
numericLabels = TRUE,
# pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = "femaleMouseTOM",
verbose = 3)## Calculating module eigengenes block-wise from all genes
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Working on block 1 .
## TOM calculation: adjacency..
## ..will not use multithreading.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 1 into file femaleMouseTOM-block.1.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 1 genes from module 1 because their KME is too low.
## ..merging modules that are too close..
## mergeCloseModules: Merging modules whose distance is less than 0.25
## Calculating new MEs...
##
## 0 1 2 3 4 5 6 7 8 9
## 3 378 203 160 119 93 64 52 48 42
We have chosen the soft thresholding power 12 , a relatively large minimum module size of 30, and a medium sensitivity (deepSplit=2) to cluster splitting. The parameter mergeCutHeight is the threshold for merging of modules. We have also instructed the function to return numeric, rather than color, labels for modules, and to save the Topological Overlap Matrix. The output of the function may seem somewhat cryptic, but it is easy to use. For example, net\(colors contains the module assignment, and net\)MEs contains the module eigengenes of the modules.
The dendrogram can be displayed together with the color assignment using the following code:
# open a graphics window
sizeGrWindow(12, 9)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneathplotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)We note that if the user would like to change some of the tree cut, module membership, and module merging criteria, the package provides the function recutBlockwiseTrees that can apply modified criteria without having to recompute the network and the clustering dendrogram. This may save a sub-stantial amount of time. We now save the module assignment and module eigengene information necessary for subsequent analysis.
In this analysis we would like to identify modules that are significantly associated with the measured clinical traits. Since we already have a summary profile (eigengene) for each module, we simply correlate eigengenes with external traits and look for the most significant associations
# Define numbers of genes and samples
nGenes = ncol(datExpr0)
nSamples = nrow(datExpr0)
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(datExpr0, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, as.numeric(datTraits$Group_num), use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)Since we have a moderately large number of modules and traits, a suitable graphical representation will help in reading the table. We color code each association by the correlation value:
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits)[4],
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = greenWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
zlim = c(-1,1),
main = paste("Module-trait relationships"))The analysis identifies the 1 significant module–trait associations. We will concentrate on GRUP as the trait of interest.
We quantify associations of individual genes with our trait of interest by defining Gene Significance GS as (the absolute value of) the correlation between the gene and the trait. For each module, we also define a quantitative measure of module membership MM as the correlation of the module eigengene and the gene expression profile. This allows us to quantify the similarity of all proteins on the array to every module
# Define variable weight containing the weight column of datTrait
grup = as.data.frame(as.numeric(datTraits$Group_num));
names(grup) = "grup"
# names (colors) of the modules
modNames = substring(names(MEs), 3)
geneModuleMembership = as.data.frame(cor(datExpr0, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="")
names(MMPvalue) = paste("p.MM", modNames, sep="")
geneTraitSignificance = as.data.frame(cor(datExpr0, grup, use = "p"))
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples))
names(geneTraitSignificance) = paste("GS.", names(grup), sep="")
names(GSPvalue) = paste("p.GS.", names(grup), sep="")##3.c Intramodular analysis: identifying genes with high GS and MM
Using the GS and MM measures, we can identify genes that have a high significance for weight as well as high module membership in interesting modules. As an example, we look at the brown module that has the highest association with weight. We plot a scatterplot of Gene Significance vs. Module Membership in the significant modules
moduls_int<-gsub("ME","",rownames(moduleTraitPvalue)[order(moduleTraitPvalue,decreasing = F)])
module = moduls_int[1]
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7)
par(mfrow = c(1,1))verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for body weight",
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)module = moduls_int[2]
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7)
par(mfrow = c(1,1))verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for body weight",
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)GS and MM are correlated, illustrating that proteins highly significantly associated with a trait are often also the most important (central) elements of modules associated with the trait. The reader is encouraged to try this code with other significance trait/module correlation.
We have found modules with high association with our trait of interest, and have identified their central players by the Module Membership measure. We now merge this statistical information with protein annotation.
gene<-colnames(datExpr0)[moduleColors==moduls_int[1]]
gene<-unlist(strsplit(gene,"\r\n"))
kegg<-enrichKEGG(gene,
organism = "hsa",
keyType = "uniprot",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
qvalueCutoff = 0.2,
use_internal_data = FALSE)
assign(paste0("kegg_",moduls_int[1]),kegg)
library(org.Hs.eg.db)
go <- enrichGO(gene = gene,
OrgDb = org.Hs.eg.db,
ont = "BP",keyType = "UNIPROT",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
assign(paste0("go_",moduls_int[1]),go)
gene<-colnames(datExpr0)[moduleColors==moduls_int[2]]
gene<-unlist(strsplit(gene,"\r\n"))
kegg<-enrichKEGG(gene,
organism = "hsa",
keyType = "uniprot",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
qvalueCutoff = 0.2,
use_internal_data = FALSE)
assign(paste0("kegg_",moduls_int[2]),kegg)
go <- enrichGO(gene = gene,
OrgDb = org.Hs.eg.db,
ont = "BP",keyType = "UNIPROT",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
assign(paste0("go_",moduls_int[2]),go)## ## KEGG
datatable(get(paste0("kegg_",moduls_int[1]))@result %>% filter(p.adjust<=0.05),caption = moduls_int[1],
filter = "top",
extensions = c('Buttons','FixedColumns'),
options = list(
dom = 'Blfrtip',
scrollX = TRUE,
scrollCollapse = TRUE,
buttons = c('copy', 'csv', 'excel'),
pageLength=5,
lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))## ## GO
datatable(get(paste0("go_",moduls_int[1]))@result%>% filter(p.adjust<=0.05),caption = moduls_int[1],
filter = "top",
extensions = c('Buttons','FixedColumns'),
options = list(
dom = 'Blfrtip',
scrollX = TRUE,
scrollCollapse = TRUE,
buttons = c('copy', 'csv', 'excel'),
pageLength=5,
lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))## ## KEGG
datatable(get(paste0("kegg_",moduls_int[2]))@result %>% filter(p.adjust<=0.05),caption = moduls_int[2],
filter = "top",
extensions = c('Buttons','FixedColumns'),
options = list(
dom = 'Blfrtip',
scrollX = TRUE,
scrollCollapse = TRUE,
buttons = c('copy', 'csv', 'excel'),
pageLength=5,
lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))## ## GO
datatable(get(paste0("go_",moduls_int[2]))@result%>% filter(p.adjust<=0.05),caption = moduls_int[2],
filter = "top",
extensions = c('Buttons','FixedColumns'),
options = list(
dom = 'Blfrtip',
scrollX = TRUE,
scrollCollapse = TRUE,
buttons = c('copy', 'csv', 'excel'),
pageLength=5,
lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))universe<-colnames(datExpr0)
gene<-colnames(datExpr0)[moduleColors==moduls_int[1]]
gene<-unlist(strsplit(gene,"\r\n"))
kegg<-enrichKEGG(gene,
universe = universe,
organism = "hsa",
keyType = "uniprot",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
qvalueCutoff = 0.2,
use_internal_data = FALSE)
assign(paste0("kegg_",moduls_int[1]),kegg)
library(org.Hs.eg.db)
go <- enrichGO(gene = gene,
universe = universe,
OrgDb = org.Hs.eg.db,
ont = "BP",keyType = "UNIPROT",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
assign(paste0("go_",moduls_int[1]),go)
gene<-colnames(datExpr0)[moduleColors==moduls_int[2]]
gene<-unlist(strsplit(gene,"\r\n"))
kegg<-enrichKEGG(gene,
organism = "hsa",
universe = universe,
keyType = "uniprot",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
qvalueCutoff = 0.2,
use_internal_data = FALSE)
assign(paste0("kegg_",moduls_int[2]),kegg)
go <- enrichGO(gene = gene,
universe = universe,
OrgDb = org.Hs.eg.db,
ont = "BP",keyType = "UNIPROT",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
assign(paste0("go_",moduls_int[2]),go)## ## KEGG
datatable(get(paste0("kegg_",moduls_int[1]))@result %>% filter(p.adjust<=0.05),caption = moduls_int[1],
filter = "top",
extensions = c('Buttons','FixedColumns'),
options = list(
dom = 'Blfrtip',
scrollX = TRUE,
scrollCollapse = TRUE,
buttons = c('copy', 'csv', 'excel'),
pageLength=5,
lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))## ## GO
datatable(get(paste0("go_",moduls_int[1]))@result%>% filter(p.adjust<=0.05),caption = moduls_int[1],
filter = "top",
extensions = c('Buttons','FixedColumns'),
options = list(
dom = 'Blfrtip',
scrollX = TRUE,
scrollCollapse = TRUE,
buttons = c('copy', 'csv', 'excel'),
pageLength=5,
lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))## ## KEGG
datatable(get(paste0("kegg_",moduls_int[2]))@result %>% filter(p.adjust<=0.05),caption = moduls_int[2],
filter = "top",
extensions = c('Buttons','FixedColumns'),
options = list(
dom = 'Blfrtip',
scrollX = TRUE,
scrollCollapse = TRUE,
buttons = c('copy', 'csv', 'excel'),
pageLength=5,
lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))## ## GO
datatable(get(paste0("go_",moduls_int[2]))@result%>% filter(p.adjust<=0.05),caption = moduls_int[2],
filter = "top",
extensions = c('Buttons','FixedColumns'),
options = list(
dom = 'Blfrtip',
scrollX = TRUE,
scrollCollapse = TRUE,
buttons = c('copy', 'csv', 'excel'),
pageLength=5,
lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))One way to visualize a weighted network is to plot its heatmap, Fig. 1. Each row and column of the heatmap correspond to a single gene. The heatmap can depict adjacencies or topological overlaps, with light colors denoting low adjacency (overlap) and darker colors higher adjacency (overlap). In addition, the gene dendrograms and module colors are plotted along the top and left side of the heatmap. The package provides a convenient function to create such network plots; Fig. 1 was created using the following code. This code can be executed only if the network was calculated using a single-block approach (that is, using the 1-step automatic or the step-by-step tutorials). If the networks were calculated using the block-wise approach, the user will need to modify this code to perform the visualization in each block separately. The modification is simple and we leave it as an exercise for the interested reader.
# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
# calculated during module detection, but let us do it again here.
dissTOM = 1-TOMsimilarityFromExpr(datExpr0, power = 9);## TOM calculation: adjacency..
## ..will not use multithreading.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
plotTOM = dissTOM^7;
# Set diagonal to NA for a nicer plot
diag(plotTOM) = NA;
# Call the plot function
sizeGrWindow(9,9)# Recalculate module eigengenes
MEs = moduleEigengenes(datExpr0, moduleColors)$eigengenes
# Isolate weight from the clinical traits
grup = as.data.frame(as.numeric(datTraits$Group_num ))
names(grup) = "grup"
# Add the weight to existing module eigengenes
MET = orderMEs(cbind(MEs, grup))
# Plot the relationships among the eigengenes and the trait
sizeGrWindow(5,7.5);
par(cex = 0.9)plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle
= 90)The function produces a dendrogram of the eigengenes and trait(s), and a heatmap of their relationships. To split the dendrogram and heatmap plots, we can use the following code
# Plot the dendrogram
par(cex = 1.0)
plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0),
plotHeatmaps = F)# Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot)
par(cex = 1.0)
plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2),
plotDendrograms = FALSE, xLabelsAngle = 90)TOM <-1-dissTOM
modules = moduls_int
probes = colnames(datExpr0)
inModule = is.finite(match(moduleColors, modules));
modProbes = probes[inModule];
# modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)
dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read
cyt = exportNetworkToCytoscape(modTOM,
edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
weighted = TRUE,
threshold = 0.02,
nodeNames = modProbes,
# altNodeNames = modGenes,
nodeAttr = moduleColors[inModule])library(igraph)
adj <- modTOM
adj[adj > 0.3] = 1
# dim(adj)
adj[adj != 1] = 0
network <- graph.adjacency(adj)
network <- simplify(network) # removes self-loops
results <- net
V(network)$color <- moduleColors[inModule]
V(network)$size<-6
par(mar=c(0,0,0,0))
# remove unconnected nodes
network <- delete.vertices(network, degree(network)==0)
igraph::plot.igraph(network,
# mark.groups = T,
layout=layout.fruchterman.reingold(network),
edge.arrow.size = 0.1)